# Replication code for Samii and West
# April 2019
# R v3.5.2
# Notes before running:
# - The packages called below need to be installed before running.
# - Be sure to have a folder called "tables" and another called "figures" 
#   in your working directory.
# - Have the following datasets in your working director:
# -- "bdi2007_noimpute_civ_rebels.dta"
# -- "bdi08_wgt130321.csv"
# -- "bdi08_rebattrition_long.dta"


library(foreign)
library(lmtest)
library(sandwich)
library(locfit)
library(apsrtable)
library(arm)

# Fitting functions

# Cluster robust
robust.se <- function(model, cluster){
  require(sandwich)
  require(lmtest)
  M <- length(unique(cluster))
  N <- length(cluster)
  K <- model$rank
  dfc <- (M/(M - 1)) * ((N - 1)/(N - K))
  uj <- apply(estfun(model), 2, function(x) tapply(x, cluster, sum))
  rcse.cov <- dfc * sandwich(model, meat = crossprod(uj)/N)
  rcse.se <- coeftest(model, rcse.cov)
  return(list(rcse.cov, rcse.se))
}

# WLS with output for apsrtable

wlsClus <- function(yarg, 
                    xspec, 
                    data.arg, 
                    weights.arg, 
                    clus.arg){
  regForm <- as.formula(paste(yarg, "~", 
                              xspec))
  fitout <- lm(  regForm,
                 data=data.arg,
                 weights=data.arg[,weights.arg])
  fitout$se <- robust.se(fitout, data.arg[,clus.arg])[[2]][,2]
  return(fitout)
}

# Data
bdi.sc <- read.dta("bdi2007_noimpute_civ_rebels.dta", convert.underscore=T)
bdi <- subset(bdi.sc, !is.na(casenum))
bdiwgts <- read.csv("bdi08_wgt130321.csv")
bdi$pweight <- bdiwgts$pweight[match(bdi$casenum,bdiwgts$casenum)]

# Trim outlier pweight:
bdi$pweight[bdi$pweight>15000] <- max(bdi$pweight[bdi$pweight<15000]) + bdi$pweight[bdi$pweight>15000]^(3/4)

# Make pre-war wealth index
for(i in c("rp5","rp6","rp7","rp8")){
	bdi[,i][is.na(bdi[,i])] <- 0
}
bdi$wealth <- apply(bdi[,c("rp5","rp6","rp7","rp8")],1,sum)


# Gender descriptives:

bdi.gd <- subset(bdi, hutu==1&dm2>26&dm2<57)
table(bdi.gd$exrebel, bdi.gd$dm1)
table(bdi.gd$exrebel, bdi.gd$dm1)/apply(table(bdi.gd$exrebel, bdi.gd$dm1), 1, sum)

rm(bdi.gd)

# Subsample of Hutu males, aged below the precipitous drop in participation propensity and
# also old enough such arguments about exposure to pre-war discrimination are valid.

# Below is code to produce the results in the appendix section that
# removes the age cut off.  The code below allows you to toggle 
# between turning on/off the age cutoffs for the sample.
# All main results in the paper use the age cut off.
# (Note that these ages are as of 2007, in which case for
# age in 1993, you would have to subtract 14.)
bdi.s <- subset(bdi, hutu==1&dm1==0&dm2>26&dm2<57)
#bdi.s <- subset(bdi, hutu==1&dm1==0)


#Checked balance of dm2 (age) in sample, and min and max are both the same for the exrebel=1/0 samples (min=27, max=60)
# Impute 0 (doesn't change results, and keeps things cleaner)
bdi.s$dm12[bdi.s$dm12==-99] <- 0
bdi.s$dm12[is.na(bdi.s$dm12)] <- 0
bdi.s$job1993[is.na(bdi.s$job1993)] <- 0
bdi.s$job2007[is.na(bdi.s$job2007)] <- 0

# Make transformed version of regressors
bdi.s$fathered <- as.numeric(bdi.s$dm12>0)
bdi.s$age1972 <- bdi.s$dm2 - 35
bdi.s$exrebel2 <- bdi.s$exrebel*.2
bdi.s$plotweight <- NA
bdi.s$plotweight[bdi.s$exrebel==1] <- bdi.s$pweight[bdi.s$exrebel==1]/100
bdi.s$plotweight[bdi.s$exrebel==0] <- (bdi.s$pweight[bdi.s$exrebel==0])/2500
bdi.s$eddiff <- bdi.s$dm11 - bdi.s$fathered

############################
## Descriptive results on forced recruitment
############################
# had friends in the group before joining
1-table(bdi.s$pf22abagenzi)["0"]/sum(table(bdi.s$pf22abagenzi))

# feared harm if didn't joint
table(bdi.s$pf70)["1"]/sum(table(bdi.s$pf70))
# feared harm from
# government 
table(subset(bdi.s, pf70==1)$pf711)["1"]/sum(table(subset(bdi.s, pf70==1)$pf711))
# hutu factions
table(subset(bdi.s, pf70==1)$pf713)["1"]/sum(table(subset(bdi.s, pf70==1)$pf713))

# no one but self took decision
table(bdi.s$pf81)["6"]/sum(table(bdi.s$pf81))
# friends convinced to join
table(bdi.s$pf81)["1"]/sum(table(bdi.s$pf81))

# Wanted to join long before they had actually joined
table(bdi.s$pf87jewe)["1"]/sum(table(bdi.s$pf87jewe))

# Physical mistreatment
# rebels
table(bdi.s$pf96jewe)["1"]/sum(table(bdi.s$pf96jewe))
table(bdi.s$pf96umuntuwo)["1"]/sum(table(bdi.s$pf96umuntuwo))
# government
table(bdi.s$pf99jewe)["1"]/sum(table(bdi.s$pf99jewe))
table(bdi.s$pf99umuntuwo)["1"]/sum(table(bdi.s$pf99umuntuwo))

############################
## Attrition analysis
############################

dthslng <- read.dta("bdi08_rebattrition_long.dta")
dthslng$adm2id <- bdi[match(dthslng$casenum, bdi$casenum),"adm2id"]
dthslng$bdirow <- match(dthslng$casenum, bdi$casenum)
dthslng$hutu <- bdi$hutu[dthslng$bdirow]
dthslng.br <- subset(dthslng, brodec==1&hutu==1&age1972decbro>-8&age1972decbro<21)
# Survey weight that accounts for fact that probability that a death is detected
# is a function of the number of living siblings that a person has
dthslng.br$pweight <- 180000*((bdi$pweight[dthslng.br$bdirow]*(dthslng.br$sibsnow+1))/sum((bdi$pweight[dthslng.br$bdirow]*(dthslng.br$sibsnow+1)))) 
colnames(dthslng.br)[match("age1972decbro",colnames(dthslng.br))] <- "age1972"
colnames(dthslng.br)[match("brodecreb",colnames(dthslng.br))] <- "exrebel"
dthslng.br$dm12 <- bdi$dm12[dthslng.br$bdirow]
dthslng.br$adm1npre <- bdi$adm1npre[dthslng.br$bdirow]

nrow(dthslng.br)
table(dthslng.br$exrebel)


# Probability of surviving conditional on age1972, father's ed, joining, and home province fixed effects.
bdi.s$brosever <- dthslng$brosever[match(bdi.s$casenum, dthslng$casenum)]
bdi.s$survived <- 1
dthslng.br$survived <- 0

attrData <- rbind(	bdi.s[,c(	"casenum","survived","age1972",
								"dm12","exrebel","adm1npre","pweight","brosever","adm2id")],
					dthslng.br[,c(	"casenum","survived","age1972","dm12",
	
									"exrebel","adm1npre","pweight","brosever","adm2id")])

attrData$died <- 1 - attrData$survived

attrData$dm12[attrData$dm12 == -99 ] <- 0
attrData$dm12[is.na(attrData$dm12)] <- 0

attrit.analysis1 <- wlsClus("died",
							"dm12 + exrebel + dm12:exrebel + as.factor(adm1npre)",
							attrData,
							"pweight",
							"adm2id")


attrit.analysis2 <- wlsClus("died",
							"dm12 + age1972 + exrebel + brosever+ age1972:exrebel + dm12:exrebel +brosever:exrebel + as.factor(adm1npre)",
							attrData,
							"pweight",
							"adm2id")
omit.coef.arg.attrit <- setdiff(names(coef(attrit.analysis2)),c(	"(Intercept)",
																"dm12",
																"exrebel",
																"brosever",
																"age1972",
																"age1972:exrebel",
																"dm12:exrebel",
																"exrebel:brosever"))
sink("tables/attrition.tex")
apsrtable(	attrit.analysis1,
			attrit.analysis2,
			omitcoef= omit.coef.arg.attrit,
			digits=3,
			coef.names=c(	"(Constant)",
							"Father's ed. (yrs.)",
							"Rebel participant",
							"Father's ed. X rebel",
							"Age",
							"No. brothers",
							"Age X rebel",
							"No. brothers X rebel"
							),
			stars="default",
			model.names=NULL,
			caption="Death as a function of father's education, rebel participation, and control variables",
			notes=list("Weighted least squares with standard errors accounting for clustering at the commune level.",
						"All models account for pre-war province fixed effects.",
						stars.note))
sink()


fitSurv <- bayesglm(survived~	age1972+I(age1972^2)+
								dm12+I(dm12^2)+
								pweight+I(pweight^2)+
								brosever+I(brosever^2)+
								age1972:dm12+
								age1972:pweight+
								age1972:brosever+
								dm12:pweight+
								dm12:brosever+
								brosever:pweight+
								exrebel+
								exrebel:age1972+
								exrebel:dm12+
								exrebel:brosever+
								exrebel:pweight+
								as.factor(adm1npre),
				data=attrData,
				family="binomial",
				prior.scale=1)  
sink("tables/surv-model.tex")
apsrtable(	fitSurv,
			omitcoef= omit.coef.arg.attrit, 
			digits=3,
			coef.names=c(	"(Constant)",
							"Age",
							"Age sq.",
							"Father's ed.",
							"Father's ed. sq.",
							"Sampling weight",
							"Sampling weight sq.",
							"No. brothers",
							"No. brothers sq.",
							"Age X father's ed.",
							"Age X sampling weight",
							"Age X No. brothers",
							"Father's ed. X sampling weight",
							"Father's ed. X no. brothers",
							"Sampling weight X no. brothers",
							"Rebel participant",
							"Rebel X Age",
							"Rebel X father's ed.",
							"Rebel X no. brothers",
							"Rebel X sampling weight"),
			stars="default",
			caption="Modeling survival probability (penalized logistic regression coefficients)")
sink()

bdi.s$psurv <- predict(fitSurv, 
							type="response",
							newdata=bdi.s[,c(	"age1972","dm12","exrebel",
												"adm1npre","pweight","brosever")])

bdi.s$logitsurv <- log(bdi.s$psurv/(1-bdi.s$psurv))

############################
## Basic results of education and participation
############################

fitbasic1.fe <- wlsClus("exrebel",
						"age1972+I(age1972^2)+dm11 + as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")

fitbasic2.fe <- wlsClus("exrebel",
						"dm11+as.factor(age1972)+as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")

fitbasic3.fe <- wlsClus("exrebel",
						"age1972+I(age1972^2)+dm12+as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")

fitbasic4.fe <- wlsClus("exrebel",
						"dm12+as.factor(age1972)+as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")
						
fitbasic5.fe.attr <- wlsClus("exrebel",
						"age1972+I(age1972^2)+dm11+ logitsurv +as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")

fitbasic6.fe.attr <- wlsClus("exrebel",
						"age1972+I(age1972^2)+dm12+ logitsurv +as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")

fitbasic7.fe.attr.both <- wlsClus("exrebel",
						"age1972+I(age1972^2)+ dm11+ dm12 +  logitsurv +as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")

omit.coef.arg1 <- names(coef(fitbasic2.fe))[-c(1,2)]

length(table(bdi.s$adm2id))

sink("tables/partbasic.tex")
apsrtable(	fitbasic1.fe,
			fitbasic2.fe,
			fitbasic3.fe,
			fitbasic4.fe,
			fitbasic5.fe.attr,
			fitbasic6.fe.attr,
			fitbasic7.fe.attr.both,
			omitcoef=omit.coef.arg1,
			digits=3,
			coef.names=c(	"(Constant)",
							"Age in 1972",
							"Age in 1972 sq.",
							"Subject's ed. (yrs.)",
							"Father's ed. (yrs.)",
							"Logit survival pr."),
			stars="default",
			caption="Participation in revolt as a function of own and father's education",
			notes=list("Weighted least squares estimates.",
						"Standard errors account for clustering at the commune level.",
						"(Number of clusters is 65.)",
						"Dependent variable is participation in armed revolt (0,1).",
						"All models control for pre-war province fixed effects.",
						"Models 2 and 4 use age-specific fixed effects.",
						stars.note))
sink()


############################
# Analysis that compares to Hutu and Tutsi males
############################

# Again, using the code below you can toggle the age cut offs on and off
bdi.ed <- subset(bdi, dm1==0&dm2>26&dm2<57)
#bdi.ed <- subset(bdi, dm1==0)

bdi.ed$dm12[bdi.ed$dm12==-99] <- 0
bdi.ed$dm12[is.na(bdi.ed$dm12)] <- 0
bdi.ed$job1993[is.na(bdi.ed$job1993)] <- 0
bdi.ed$job1993b[is.na(bdi.ed$job1993b)] <- -1
bdi.ed$job2007[is.na(bdi.ed$job2007)] <- 0
bdi.ed$age1972 <- bdi.ed$dm2 - 35
bdi.ed$brosever <- dthslng$brosever[match(bdi.ed$casenum, dthslng$casenum)]
bdi.ed$psurv <- predict(fitSurv, 
							type="response",
							newdata=bdi.ed[,c(	"age1972","dm12","exrebel",
												"adm1npre","pweight","brosever")])
bdi.ed$logitsurv <- log(bdi.ed$psurv/(1-bdi.ed$psurv))

fitedcorall.fe <- wlsClus("dm11",
                        "age1972+I(age1972^2) +dm12+ hutu +dm12:hutu+age1972:hutu+I(age1972^2):hutu +logitsurv+as.factor(adm1npre)",
                        bdi.ed,
                        "pweight",
                        "adm2id")

fitjob93corrall.fe <- wlsClus("job1993b",
                            "age1972+I(age1972^2)+ dm12+hutu +dm12:hutu+age1972:hutu+I(age1972^2):hutu+logitsurv+as.factor(adm1npre)",
                            bdi.ed,
                            "pweight",
                            "adm2id")

omit.coef.arg2 <- setdiff(names(coef(fitedcorall.fe)),
						c("(Intercept)",
						"dm12",
						"age1972","I(age1972^2)",
						"hutu",
						"dm12:hutu",
						"age1972:hutu",
						"I(age1972^2):hutu"))

sink("tables/tutsi-placebo-check.tex")
apsrtable( fitedcorall.fe,
           fitjob93corrall.fe,
           omitcoef=omit.coef.arg2,
           digits=3,
           coef.names=c( "(Constant)",
						 "Age in 1972",
						 "Age in 1972 sq.",
                         "Father's ed. (yrs.)",
                         "Hutu",
                         "Father's ed.X Hutu",
                         "Age X Hutu",
                         "Age sq. X Hutu"),
           stars="default",
           caption="Subject's education and occupational ranking as a function of father's education (all-ethnicity sample)",
           model.names=c("Pre-war ed.","Pre-war occ. ranking"),
           notes=list("Weighted least squares estimates.",
                      "Standard errors account for clustering at the commune level.",
                      "(Number of clusters is 66.)",
                      "D.V. in Model 1 is respondent's own level of education before the war (0-18).",
                      "D.V. in Model 2 is respondent's own occupational ranking before the war (-1,0,1).",
                      "Sample includes both Hutu and Tutsi male respondents.",
                      "All models control for pre-war province fixed effects and survival probability.",
                      stars.note))
sink()

############################
# Ranks and Jobs Obtained Check
############################

bdi.ranks.df1 <- subset(bdi.s, exrebel==1&!is.na(df1)&df1>0)
bdi.ranks.df2 <- subset(bdi.s, exrebel==1&!is.na(df2)&df2>0)
bdi.ranks.df1$inrank <- 15-bdi.ranks.df1$df1
bdi.ranks.df2$outrank <- 15-bdi.ranks.df2$df2


jobs1.fe <- wlsClus("job1993b",
                    "age1972+I(age1972^2)+dm11+as.factor(adm1npre)+ logitsurv",
                    bdi.s,
                    "pweight",
                    "adm2id")

jobs1f.fe <- wlsClus("job1993b",
                     "age1972+I(age1972^2)+dm12+as.factor(adm1npre)+ logitsurv",
                     bdi.s,
                     "pweight",
                     "adm2id")

jobs2.fe <- wlsClus("job2007b",
                    "age1972+I(age1972^2)+dm11+as.factor(adm1npre)+ logitsurv",
                    bdi.s,
                    "pweight",
                    "adm2id")

jobs2f.fe <- wlsClus("job2007b",
                     "age1972+I(age1972^2)+dm12+as.factor(adm1npre)+ logitsurv",
                     bdi.s,
                     "pweight",
                     "adm2id")


ranks1.fe <- wlsClus("inrank",
                     "age1972+I(age1972^2)+dm11+as.factor(adm1npre)+ logitsurv",
                     bdi.ranks.df1,
                     "pweight",
                     "adm2id")

ranks1f.fe <- wlsClus("inrank",
                     "age1972+I(age1972^2)+dm12+as.factor(adm1npre)+ logitsurv",
                     bdi.ranks.df1,
                     "pweight",
                     "adm2id")

ranks2.fe <- wlsClus("outrank",
                     "age1972+I(age1972^2)+dm11+as.factor(adm1npre)+ logitsurv",
                     bdi.ranks.df2,
                     "pweight",
                     "adm2id")


ranks2f.fe <- wlsClus("outrank",
                     "age1972+I(age1972^2)+dm12+as.factor(adm1npre)+ logitsurv",
                     bdi.ranks.df2,
                     "pweight",
                     "adm2id")

omit.coef.arg3 <- names(coef(jobs1.fe))[-c(1,2,3,4)]

sink("tables/jobranks.tex")
apsrtable( jobs1.fe,
           jobs1f.fe,
           jobs2.fe,
           jobs2f.fe,
          omitcoef=omit.coef.arg3,
           digits=3,
            coef.names=c(	"(Constant)",
                          "Age in 1972",
                          "Age in 1972 sq.",
                          "Subject's Ed. (yrs.)",
                          "Father's ed. (yrs.)"),
           stars="default",
           caption="Occupational ranking as a function of own and father's education",
           model.names=c(	"Pre-war occ.","Pre-war occ.","Post-war occ.","Post-war occ."),
           notes=list("Weighted least squares estimates.",
                      "Standard errors account for clustering at the commune level.",
                      "(Number of clusters is 65.)",
                      "D.V. is occupational rank (-1,0,1).",
                      "All models control for pre-war province fixed effects and survival probability.",
                      stars.note))
length(table(bdi.ranks.df1$adm2id))
length(table(bdi.ranks.df2$adm2id))
apsrtable( ranks1.fe,
           ranks1f.fe,
           ranks2.fe,
           ranks2f.fe,
           omitcoef=omit.coef.arg3,
           digits=3,
            coef.names=c(	"(Constant)",
                          "Age in 1972",
                          "Age in 1972 sq.",
                          "Subject's Ed. (yrs.)",
                          "Father's ed. (yrs.)"),
           stars="default",
           caption="Military ranks as a function of own and father's education (rebels only)",
           model.names=c(	"Incoming rank","Incoming rank",
           					"Outgoing rank","Outgoing rank"),
           notes=list("Weighted least squares estimates.",
                      "Standard errors account for clustering at the commune level.",
                      "(Number of clusters is 65.)",                      
                      "D.V. is military rank (1 simple soldier - 15 general).",
                      "All models control for pre-war province fixed effects and survival probability.",
                      stars.note))
sink()

############################
# Historical natural experiment
############################

#Education as function of father's ed, by age categories in 1972

fitedcor.fe <- wlsClus("dm11",
						"age1972+I(age1972^2)+dm12+as.factor(adm1npre)+ logitsurv",
						bdi.s,
						"pweight",
						"adm2id")

fitedcor12.fe <- wlsClus("dm11",
						"age1972+I(age1972^2)+dm12+I(age1972>12)+dm12:I(age1972>12)+as.factor(adm1npre)+ logitsurv",
						bdi.s,
						"pweight",
						"adm2id")

fitedcor16.fe <- wlsClus("dm11",
						"age1972+I(age1972^2)+dm12+I(age1972>16)+dm12:I(age1972>16)+as.factor(adm1npre)+ logitsurv",
						bdi.s,
						"pweight",
						"adm2id")

omit.coef.arg <- names(coef(fitedcor.fe))[-c(1,2,3,4)]

sink("tables/fitedcor.tex")
apsrtable(	fitedcor.fe,
			fitedcor12.fe,
			fitedcor16.fe,
			omitcoef=omit.coef.arg,
			digits=3,
			coef.names=c(	"(Constant)",
							"Age in 1972",
							"Age in 1972 sq.",
							"Father's ed. (yrs.)",
							"Age in 1972 > 12",
							"Father's ed. (yrs.) X Age in 1972 > 12",
							"Age in 1972 > 16",
							"Father's ed. (yrs.) X Age in 1972 > 16"),
			stars="default",
			caption="Education as a function of father's education, by age categories in 1972",
			notes=list("Weighted least squares with standard errors accounting for clustering at the commune level.",
						"Number of clusters is 65.",
						"Dependent variable is respondent's years of education before the war (0-18).",
						"All models control for pre-war province fixed effects and survival probability.",
						stars.note))
sink()

#Participation as function of father's ed, by age cohorts in 72

fitpart.fe <- wlsClus("exrebel",
						"age1972+I(age1972^2)+dm12+as.factor(adm1npre)+ logitsurv",
						bdi.s,
						"pweight",
						"adm2id")

fitpart12.fe <- wlsClus("exrebel",
						"age1972+I(age1972^2)+dm12+I(age1972>12)+dm12:I(age1972>12)+as.factor(adm1npre)+ logitsurv",
						bdi.s,
						"pweight",
						"adm2id")

fitpart16.fe <- wlsClus("exrebel",
						"age1972+I(age1972^2)+dm12+I(age1972>16)+dm12:I(age1972>16)+as.factor(adm1npre)+ logitsurv",
						bdi.s,
						"pweight",
						"adm2id")

fitpart16.fe.unrest <- lm(exrebel~dm12+age1972+I(age1972^2)+I(age1972>16)+dm12:I(age1972>16)+as.factor(adm1npre)+ logitsurv,
							bdi.s,
							weights=bdi.s$pweight)
vcov.fitpart16.fe.unrest <- robust.se(fitpart16.fe.unrest, bdi.s$adm2id)[[1]]
fitpart16.fe.rest <- lm(exrebel~age1972+I(age1972^2)+I(age1972>16)+as.factor(adm1npre)+logitsurv,
							bdi.s,
							weights=bdi.s$pweight)
vcov.fitpart16.fe.rest <- robust.se(fitpart16.fe.rest, bdi.s$adm2id)[[1]]
waldtest(fitpart16.fe.unrest,fitpart16.fe.rest, vcov=vcov.fitpart16.fe.unrest)

omit.coef.arg <- names(coef(fitpart.fe))[-c(1,2,3,4)]

sink("tables/part.tex")
apsrtable(	fitpart.fe,
			fitpart12.fe,
			fitpart16.fe,			
			omitcoef=omit.coef.arg,
			digits=3,
			coef.names=c(	"(Constant)",
							"Age in 1972",
							"Age in 1972 sq.",
							"Father's ed. (yrs.)",
							"Age in 1972 > 12",
							"Father's ed. (yrs.) X Age in 1972 > 12",
							"Age in 1972 > 16",
							"Father's ed. (yrs.) X Age in 1972 > 16"),
			stars="default",
			caption="Participation in revolt as a function of father's education, by age categories in 1972",
			notes=list("Weighted least squares estimates.",
						"Standard errors account for clustering at the commune level.",
						"Number of clusters is 65.",
						"Dependent variable is participation in armed revolt (0,1).",
						"All models control for pre-war province fixed effects and survival probability.",
						stars.note,
						"$^\\ddag$ jointly significant at $p<.10$."))
sink()

bdi.s$dm20[is.na(bdi.s$dm20)] <- 1

fitdadalive.fe <- wlsClus("dm20",
						"age1972+I(age1972^2)+dm12+as.factor(adm1npre)+ logitsurv",
						bdi.s,
						"pweight",
						"adm2id")

fitdadalive12.fe <- wlsClus("dm20",
						"age1972+I(age1972^2)+dm12+I(age1972>12)+dm12:I(age1972>12)+as.factor(adm1npre)+ logitsurv",
						bdi.s,
						"pweight",
						"adm2id")

fitdadalive16.fe <- wlsClus("dm20",
						"age1972+I(age1972^2)+dm12+I(age1972>16)+dm12:I(age1972>16)+as.factor(adm1npre)+ logitsurv",
						bdi.s,
						"pweight",
						"adm2id")

sink("tables/dadalive.tex")
apsrtable(	fitdadalive.fe,
			fitdadalive12.fe,
			fitdadalive16.fe,	
			omitcoef=omit.coef.arg,
			digits=3,
			coef.names=c(	"(Constant)",
							"Age in 1972",
							"Age in 1972 sq.",
							"Father's ed. (yrs.)",
							"Age in 1972 > 12",
							"Father's ed. (yrs.) X Age in 1972 > 12",
							"Age in 1972 > 16",
							"Father's ed. (yrs.) X Age in 1972 > 16"),
			stars="default",
			caption="Father alive in 1993 as a function of father's education, by age categories in 1972",
			notes=list("Weighted least squares estimates.",
						"Standard errors account for clustering at the commune level.",
						"Dependent variable is father alive in 1993 (0,1).",
						"All models control for pre-war province fixed effects and survival probability.",
						stars.note,
						"$^\\ddag$ jointly significant at $p<.10$."))
sink()

############################
# Alternative explanations (appendix)
############################

# Revenge Hypothesis Test
exrebel <- subset(bdi.s, exrebel==1)

revenge <- wlsClus("revenge",
                   "age1972+I(age1972^2)+ dm12 + logitsurv +as.factor(adm1npre)",
                   exrebel,
                   "pweight",
                   "adm2id")

revenge.12.fe <- wlsClus("revenge",
                            "age1972+I(age1972^2)+dm12+I(age1972>12)+dm12:I(age1972>12)+as.factor(adm1npre)+ logitsurv",
                            exrebel,
                            "pweight",
                            "adm2id")

revenge.16.fe <- wlsClus("revenge",
                            "age1972+I(age1972^2)+dm12+I(age1972>16)+dm12:I(age1972>16)+as.factor(adm1npre)+ logitsurv",
                            exrebel,
                            "pweight",
                            "adm2id")

length(table(exrebel$adm2id))
sink("tables/revenge.tex")
apsrtable(  revenge, revenge.12.fe,revenge.16.fe, 
            omitcoef= omit.coef.arg,
            digits=3,
            coef.names=c(  "(Constant)",
                           "Age in 1972",
                           "Age in 1972 sq.",
                           "Father's ed. (yrs.)",
                           "Age in 1972 > 12",
                           "Father's ed. (yrs.) X Age in 1972 > 12",
                           "Age in 1972 > 16",
                           "Father's ed. (yrs.) X Age in 1972 > 16"),
            stars="default",
            caption="Participation for ``revenge'' as a function of father's education",
            notes=list("Weighted least squares estimates.",
                       "Standard errors account for clustering at the commune level.",
                       "(Number of clusters is 65.)",
                       "Dependent variable is participation in armed revolt (0,1).",
                       "All models control for pre-war province fixed effects and survival probability.",
                       stars.note))
sink()


# Selective recruitment

bdi.s.c <- subset(bdi.s, exrebel==0)
bdi.s.c$pc1[is.na(bdi.s.c$pc1)] <- 0

fitselrec.own.fe.attr <- wlsClus("pc1",
						"age1972+I(age1972^2)+dm11+ logitsurv +as.factor(adm1npre)",
						bdi.s.c,
						"pweight",
						"adm2id")

fitselrec.fat.fe.attr <- wlsClus("pc1",
						"age1972+I(age1972^2)+dm12+ logitsurv +as.factor(adm1npre)",
						bdi.s.c,
						"pweight",
						"adm2id")

omit.coef.arg.selrec <- names(coef(fitselrec.fat.fe.attr))[-c(1,2,3,4)]
length(table(bdi.s.c$adm2id))
						
sink("tables/selrecruit.tex")
apsrtable(	fitselrec.own.fe.attr,
			fitselrec.fat.fe.attr,
			omitcoef=omit.coef.arg.selrec,
			digits=3,
			coef.names=c(	"(Constant)",
							"Age in 1972",
							"Age in 1972 sq.",
							"Subject's ed. (yrs.)",
							"Father's ed. (yrs.)"),
			stars="default",
			caption="Whether civilian considered joining, as a function of own and father's education",
			notes=list("Weighted least squares estimates.",
						"Standard errors account for clustering at the commune level.",
						"(Number of clusters is 61.)",
						"Dependent variable is whether civilian considered joining (0=no,1=yes).",
						"All models control for pre-war province fixed effects and survival probability.",
						stars.note))
sink()

# Household opportunity costs

fitbasic3.fe.bros.attrit <- wlsClus("exrebel",
						"age1972+I(age1972^2)+dm11 + dm12 + brosever + logitsurv + as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")

fitbasic3.fe.wealth.attrit <- wlsClus("exrebel",
						"age1972+I(age1972^2)+dm11 + dm12 + wealth + logitsurv + as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")

fitbasic3.fe.bros.wealth.attrit <- wlsClus("exrebel",
						"age1972+I(age1972^2)+dm11 + dm12 +brosever + wealth + logitsurv + as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")

fitbasic3.fe.bros.ed <- wlsClus("brosever",
						"age1972+I(age1972^2)+dm11 + dm12 + logitsurv + as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")

fitbasic3.fe.wealth.ed <- wlsClus("wealth",
						"age1972+I(age1972^2)+dm11 + dm12 + logitsurv + as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")

fitbasic3.fe.bros <- wlsClus("brosever",
						"age1972+I(age1972^2)+logitsurv + as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")

fitbasic3.fe.wealth <- wlsClus("wealth",
						"age1972+I(age1972^2)+logitsurv + as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")

						
sink("tables/partbasic-forappendix-bros-wealth.tex")
apsrtable(	fitbasic3.fe.bros.ed,
			fitbasic3.fe.bros.attrit,
			fitbasic3.fe.wealth.ed,
			fitbasic3.fe.wealth.attrit,
			fitbasic3.fe.bros.wealth.attrit,
			omitcoef=omit.coef.arg,
			digits=3,
			coef.names=c(	"(Constant)",
							"Age in 1972",
							"Age in 1972 sq.",
							"Subj. ed.",
							"Father's ed.",
							"Number of brothers in 1993",
							"Wealth index"),
			stars="default",
			caption="Evaluating whether factors related to household opportunity costs confound estimates of the relationship between education and participation",
			model.names=c("DV=No. bros.","DV=Rebel","DV=Wealth","DV=Rebel","DV=Rebel"),
			notes=list("Weighted least squares estimates.",
						"Standard errors account for clustering at the commune level.",
						"(Number of clusters is 65.)",						
						"All models control for pre-war province fixed effects and survival probability.",
						"In results columns 1 and 3, subj. ed. and father's ed. are jointly significant with p < .05.",
						stars.note))

anova(fitbasic3.fe.bros, fitbasic3.fe.bros.ed)
anova(fitbasic3.fe.wealth, fitbasic3.fe.wealth.ed)

sink()


# Network resources

bdi.s$friends <- ifelse(bdi.s$cm4!=1&!is.na(bdi.s$cm4), 1, 0)
bdi.s$rebeltold <- apply(cbind(bdi.s$pf17cndd, bdi.s$pf17fnl),
						1,
						function(x){ifelse(max(x)>0&!is.na(max(x)), 1, 0)})


fitbasic3.fe.radio <- wlsClus("exrebel",
						"age1972+I(age1972^2)+dm11+ dm12 + rp7 + logitsurv + as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")

fitbasic3.fe.rebeltold <- wlsClus("exrebel",
						"age1972+I(age1972^2)+dm11+ dm12 + rebeltold + logitsurv + as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")

fitbasic3.fe.friends <- wlsClus("exrebel",
						"age1972+I(age1972^2)+dm11+ dm12 + friends + logitsurv + as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")

fitbasic3.fe.radio.ed <- wlsClus("rp7",
						"age1972+I(age1972^2)+dm11 + dm12  + logitsurv + as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")

fitbasic3.fe.rebeltold.ed <- wlsClus("rebeltold",
						"age1972+I(age1972^2)+dm11 + dm12+ logitsurv + as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")


fitbasic3.fe.friends.ed <- wlsClus("friends",
						"age1972+I(age1972^2)+dm11 + dm12 + logitsurv + as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")


fitbasic3.fe.radio.x <- wlsClus("rp7",
						"age1972+I(age1972^2)+logitsurv + as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")

fitbasic3.fe.rebeltold.x <- wlsClus("rebeltold",
						"age1972+I(age1972^2)+logitsurv + as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")


fitbasic3.fe.friends.x <- wlsClus("friends",
						"age1972+I(age1972^2)+logitsurv + as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")

sink("tables/partbasic-forappendix-networks.tex")
apsrtable(	fitbasic3.fe.radio.ed,
			fitbasic3.fe.radio,
			fitbasic3.fe.rebeltold.ed,
			fitbasic3.fe.rebeltold,
			fitbasic3.fe.friends.ed,
			fitbasic3.fe.friends,
			digits=3,
			omitcoef=omit.coef.arg,
			coef.names=c(	"(Constant)",
							"Age in 1972",
							"Age in 1972 sq.",
							"Subj. ed.",
							"Father's ed.",
							"HH had radio pre-war",
							"Informed by faction pre-war",
							"Spent time w. friends pre-war"),
			stars="default",
			caption="Evaluating whether collective action resources confound estimates of the relationship between education and participation",
			model.names=c(	"DV=Radio","DV=Rebel",
							"DV=Informed","DV=Rebel",
							"DV=Friends","DV=Rebel"),
			notes=list("Weighted least squares estimates.",
						"Standard errors account for clustering at the commune level.",
						"(Number of clusters is 65.)",						
						"All models control for pre-war province fixed effects and survival probability.",
						"In results columns 1, 3, and 5, subj. ed. and father's ed. are jointly significant with p < .05.",
						stars.note))

anova(fitbasic3.fe.radio.x, fitbasic3.fe.radio.ed)
anova(fitbasic3.fe.rebeltold.x, fitbasic3.fe.rebeltold.ed)
anova(fitbasic3.fe.friends.x, fitbasic3.fe.friends.ed)

sink()

# Political socialization

bdi.s$support93_uprona <- ifelse(bdi.s$dm26==1&!is.na(bdi.s$dm26), 1, 0)
bdi.s$support93_frodebu <- ifelse(bdi.s$dm26==2&!is.na(bdi.s$dm26), 1, 0)
bdi.s$support93_none <- ifelse(bdi.s$dm26==3&!is.na(bdi.s$dm26), 1, 0)
bdi.s$support93_other <- ifelse(bdi.s$dm26==4&!is.na(bdi.s$dm26), 1, 0)

fitbasic3.fe.support93uprona <- wlsClus("support93_uprona",
						"age1972+I(age1972^2)+dm11 + dm12 + logitsurv + as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")


fitbasic3.fe.support93uprona.x <- wlsClus("support93_uprona",
						"age1972+I(age1972^2)+ logitsurv + as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")

fitbasic3.fe.support93frodebu <- wlsClus("support93_frodebu",
						"age1972+I(age1972^2)+dm11 + dm12 + logitsurv + as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")

fitbasic3.fe.support93frodebu.x <- wlsClus("support93_frodebu",
						"age1972+I(age1972^2)+ logitsurv + as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")

fitbasic3.fe.support93none <- wlsClus("support93_none",
						"age1972+I(age1972^2)+dm11 + dm12 + logitsurv + as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")

fitbasic3.fe.support93none.x <- wlsClus("support93_none",
						"age1972+I(age1972^2) + logitsurv + as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")

fitbasic3.fe.support93other <- wlsClus("support93_other",
						"age1972+I(age1972^2)+dm11 + dm12 + logitsurv + as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")

fitbasic3.fe.support93other.x <- wlsClus("support93_other",
						"age1972+I(age1972^2)+ logitsurv + as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")


fitbasic3.fe.rebel.support <- wlsClus("exrebel",
						"age1972+I(age1972^2)+dm11+ dm12 + support93_frodebu + support93_none + support93_other + logitsurv + as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")


sink("tables/partbasic-forappendix-politicalsocialization.tex")
apsrtable(	fitbasic3.fe.support93uprona,
			fitbasic3.fe.support93frodebu,
			fitbasic3.fe.support93none,
			fitbasic3.fe.support93other,
			fitbasic3.fe.rebel.support,
			digits=3,
			omitcoef=omit.coef.arg,
			coef.names=c(	"(Constant)",
							"Age in 1972",
							"Age in 1972 sq.",
							"Subj. ed.",
							"Father's ed.",
							"1993 support for FRODEBU",
							"1993 support for none",
							"1993 support for other"),
			stars="default",
			caption="Evaluating whether political socialization confounds estimates of the relationship between education and participation",
			model.names=c(	"DV=Sup. UPRONA","DV=Sup. FRODEBU",
							"DV=Sup. none","DV=Sup. other","DV=Rebel"),
			notes=list("Weighted least squares estimates.",
						"Standard errors account for clustering at the commune level.",
						"(Number of clusters is 65.)",						
						"All models control for pre-war province fixed effects and survival probability.",
						"In results columns 1, 2, and 3, subj. ed. and father's ed. are jointly significant with p < .05.",
						stars.note))

anova(fitbasic3.fe.support93uprona, fitbasic3.fe.support93uprona.x)
anova(fitbasic3.fe.support93frodebu, fitbasic3.fe.support93frodebu.x)
anova(fitbasic3.fe.support93none, fitbasic3.fe.support93none.x)
anova(fitbasic3.fe.support93other, fitbasic3.fe.support93other.x)

sink()

write.csv(bdi.s, file="bdi_s.csv", row.names=F)
write.csv(subset(bdi.ed, hutu==0), file="bdi_ed_tutsi.csv", row.names=F)

############################
# Effects of father's education levels by age (graph)
############################

scalePar <- TRUE
alphaPar <- .25
locfit1 <- locfit(exrebel~age1972, 
					data=subset(bdi.s, fathered==1),
					family="binomial",
					weights=subset(bdi.s, fathered==1)$pweight,
					deg=2,
					scale=scalePar,
					alpha=alphaPar)
locfit0 <- locfit(exrebel~age1972, 
					data=subset(bdi.s, fathered==0),
					family="binomial",
					weights=subset(bdi.s, fathered==0)$pweight,
					deg=2,
					scale=scalePar,
					alpha=alphaPar)
ageRange <- -8:21
locpred1 <- preplot(locfit1, newdata=ageRange, band="local")
locpred0 <- preplot(locfit0, newdata=ageRange, band="local")
invlogit <- function(x)((1+exp(-x))^(-1))

pdf(file="figures/agepartint.pdf", width=8, height=5)
par(mfrow=c(1,1))
plot(ageRange, 
		invlogit(locpred1[[2]]),
		type="n",
		ylim=c(-0.005,.035),
		axes=F,
		xlab="Age in 1972",
		ylab="Probability of joining")

polygon(c(ageRange, rev(ageRange)),
		c(invlogit(locpred1[[2]]+1.96* locpred1[[3]]),
		rev(invlogit(locpred1[[2]]-1.96* locpred1[[3]]))),
		col=gray(.25, alpha=.5),
		border=NA)
points(ageRange, 
		invlogit(locpred1[[2]]),
		type="l",
		lwd=2)

polygon(c(ageRange, rev(ageRange)),
		c(invlogit(locpred0[[2]]+1.96* locpred0[[3]]),
		rev(invlogit(locpred0[[2]]-1.96* locpred0[[3]]))),
		col=gray(.5, alpha=.5),
		border=NA)
points(ageRange, 
		invlogit(locpred0[[2]]),
		type="l",
		lty="dashed",
		lwd=2)

axis(1, seq(from=-10, to=20, by=5))
axis(2, c(0:3/100), las=2)
with(subset(bdi.s, fathered==1),
	points(age1972+runif(length(age1972),-.2,.2),
		exrebel*.0375-.0025,
		pch="|"))
with(subset(bdi.s, fathered==0),
	points(age1972+runif(length(age1972),-.2,.2),
		exrebel*.0375-.005,
		pch="o"))

legend(0, .02,
			c("| : Father w/ formal ed.", "o : Father with no formal ed."),
			col=c("black", "black"),
			lty=c("solid","dashed"),
			bty="n",
			lwd=c(2,2),
			cex=1)
dev.off()

############################
# Sensitivity to outmigration opportunities
############################

fitbasic5.fe.attr <- wlsClus("exrebel",
						"age1972+I(age1972^2)+dm11+ logitsurv +as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")

fitbasic6.fe.attr <- wlsClus("exrebel",
						"age1972+I(age1972^2)+dm12+ logitsurv +as.factor(adm1npre)",
						bdi.s,
						"pweight",
						"adm2id")

borderProvs <- c(	"Bubanza","Cankuzo","Cibitoke","Kayanza",
					"Kirundo","Makamba","Muyinga","Ngozi",
					"Rutana","Ruyigi","zz_Rwanda","zz_Tanzania")  
bdi.s$borderprov <- 0
bdi.s$borderprov[bdi.s$adm1npre %in% borderProvs] <- 1

table(bdi.s$borderprov)/sum(table(bdi.s$borderprov))


fitbasic5.fe.attr.nobord <- wlsClus("exrebel",
						"age1972+I(age1972^2)+dm11+ logitsurv +as.factor(adm1npre)",
						subset(bdi.s, borderprov==0),
						"pweight",
						"adm2id")
fitbasic6.fe.attr.nobord <- wlsClus("exrebel",
						"age1972+I(age1972^2)+dm12+ logitsurv +as.factor(adm1npre)",
						subset(bdi.s, borderprov==0),
						"pweight",
						"adm2id")

omit.coef.arg.nobord <- names(coef(fitbasic6.fe.attr))[-c(1,2,3,4,5)]

sink("tables/partbasic-sens.tex")
apsrtable(	fitbasic5.fe.attr,
			fitbasic5.fe.attr.nobord,
			fitbasic6.fe.attr,
			fitbasic6.fe.attr.nobord,
			omitcoef=omit.coef.arg.nobord,
			digits=3,
			coef.names=c(	"(Constant)",
							"Age in 1972",
							"Age in 1972 sq.",
							"Subject's ed. (yrs.)",
							"Logit survival pr.",
							"Father's ed. (yrs.)"),
			stars="default",
			caption="Participation in revolt as a function of own and father's education",
			model.names=c("1. Full sample", "2. No border prov.", "3. Full sample", "4. No border prov."),
			notes=list("Weighted least squares estimates.",
						"Standard errors account for clustering at the commune level.",
						"Dependent variable is participation in armed revolt (0,1).",
						"All models control for pre-war province fixed effects.",
						"Models 2 and 4 drop observations in border provinces.",
						stars.note))
sink()

